library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
panteon <- read_csv('https://raw.githubusercontent.com/Tomasz-Olczyk/wizualizacjaR/main/podstawy/panteon_s.csv')
## Rows: 11341 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): name, countryName, countryCode3, continentName, gender, industry, d...
## dbl (6): LAT, LON, birthyear, L_star, HPI, AverageViews
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
?map_data
poland <- map_data('world', region = 'Poland')
germany <- map_data('world', region = 'Germany')
ggplot(germany, aes(x = long,
y = lat,
group = group)) +
geom_polygon(fill = 'red',
colour = 'white') +
coord_map()
panteon <- read_csv('https://raw.githubusercontent.com/Tomasz-Olczyk/wizualizacjaR/main/podstawy/panteon_s.csv')
## Rows: 11341 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): name, countryName, countryCode3, continentName, gender, industry, d...
## dbl (6): LAT, LON, birthyear, L_star, HPI, AverageViews
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Francja miejsca urodzenia znanych osób po 1945
niemcy <- panteon %>%
filter(countryCode3 == "DEU",
birthyear > 1945)
#install.packages("ggrepel")
library(ggrepel)
poland <- map_data('world', region = 'Poland')
polska <- panteon %>%
filter(countryCode3 == "POL")
p <- ggplot(poland, aes(x = long, y = lat, group = group)) +
geom_polygon(fill = 'white', colour = 'black') +
geom_point(data = polska, aes(x= LON,
y = LAT,
colour = gender,
size = AverageViews*10,
group = name), alpha = 0.5) +
ggrepel::geom_text_repel(data = polska, aes(x= LON,
y = LAT,
label = name,
group = name), size =3) +
#coord_map() +
theme_void()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
ggplotly(p)
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomTextRepel() has yet to be implemented in plotly.
## If you'd like to see this geom implemented,
## Please open an issue with your example code at
## https://github.com/ropensci/plotly/issues
world = map_data('world')
świat6 <- ggplot(world, aes(x = long, y = lat, group = group)) +
geom_polygon(fill = 'white', colour = 'black')
Francja
francja <- panteon %>%
filter(countryCode3 == "FRA",
birthyear > 1945)
fr <- ggplot(world, aes(x = long, y = lat, group = group)) +
geom_polygon(fill = 'white', colour = 'black') +
geom_point(data = francja, aes(x= LON,
y = LAT,
colour = gender,
size = AverageViews/1000,
group = name),
alpha = 0.5,
show.legend = FALSE) +
theme_void()
library(plotly)
ggplotly(fr)
Chcemy narysować mapę z miejscami urodzenia osób ze zbioru Panteon urodzonych na terenie Polski po 1945.
Polska <- panteon %>%
filter(countryCode3 == "POL",
birthyear > 1945)
polska <- map_data("world", region = 'Poland')
ggplot(polska, aes(x = long,
y = lat,
group = group)) +
geom_polygon(fill = 'white', colour = 'black')
ggplot(polska, aes(x = long,
y = lat,
group = group)) +
geom_polygon(fill = 'white', colour = 'black') +
geom_point(data = Polska,
aes(x= LON, y = LAT,
colour = gender,
size = AverageViews*10,
group = name, alpha = 0.5)) +
ggrepel::geom_text_repel(data = Polska, aes(x= LON,
y = LAT,
label = name,
group = name)) +
theme_void()
## Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
world = map_data('world')
(świat_p <- ggplot(world, aes(x = long, y = lat, group = group)) +
geom_polygon(fill = 'white', colour = 'black') +
geom_point(data = panteon %>% filter(birthyear < 1000),
aes(x= LON,
y = LAT,
colour = gender,
size = AverageViews/10000,
group = name,
alpha = 0.1)) +
theme_void()
)
## Warning: Removed 527 rows containing missing values or values outside the scale range
## (`geom_point()`).
Z wykorzystaniem plotly
ggplotly(świat_p)
Panteon próba łączenie ramek danych
widoczność <- panteon %>%
mutate(region = toupper(countryName))
widoczność <- widoczność %>%
filter(countryCode3 != "unknown") %>%
group_by(region) %>%
summarise(widoczność = sum(AverageViews)) %>%
drop_na()
world <- world %>%
mutate(region = toupper(region))
świat <- world %>%
left_join(widoczność)
## Joining with `by = join_by(region)`
?gapminder
## No documentation for 'gapminder' in specified packages and libraries:
## you could try '??gapminder'
ggplot(świat, aes(x = long, y = lat, group = group)) +
geom_polygon(aes(fill = widoczność), colour = 'black') +
scale_fill_continuous(low = "lightblue", high = "skyblue4")
ggplot(świat, aes(x = long, y = lat, group = group)) +
geom_polygon(aes(fill = log(widoczność)),
colour = 'black') +
scale_fill_continuous(low = "white", high = "orange")
ggplot(świat, aes(x = long, y = lat, group = group)) +
geom_polygon(aes(fill = widoczność),
colour = 'black') +
scale_fill_continuous(low = "white", high = "orange")
widoczność$region <- str_replace_all(widoczność$region, "UNITED KINGDOM", "UK")
widoczność$region <- str_replace_all(widoczność$region, "UNITED STATES", "USA")
world <- world %>%
mutate(region = toupper(region))
świat <- world %>%
left_join(widoczność)
## Joining with `by = join_by(region)`
(w <- ggplot(świat, aes(x = long, y = lat, group = group)) +
geom_polygon(aes(fill = widoczność), colour = 'black') +
scale_fill_gradient(low = "lightblue", high = "skyblue4")
)
ggplotly(w)
#install.packages("sf")
library(sf)
## Linking to GEOS 3.10.2, GDAL 3.4.2, PROJ 8.2.1; sf_use_s2() is TRUE
Uwaga: pliki shp należy ściągnać z gadm.org
Dane geojson punkt niżej dostępne przez github
Jeśli mamy na dysku pliki shp możemy wczytać funkcją st_read:
województwa <- sf::st_read("ścieżkadopliku/gadm41_POL_1.shp")
Użyjemy jednak danych typu geojson bo są lżejsze:
Potrzeba wcześniej zainstalować bibliotekę geojsonio
#install.packages("geojsonio")
#install.packages("sf")
library(sf)
library(geojsonio)
## The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
## which was just loaded, will retire in October 2023.
## Please refer to R-spatial evolution reports for details, especially
## https://r-spatial.org/r/2023/05/15/evolution4.html.
## It may be desirable to make the sf package available;
## package maintainers should consider adding sf to Suggests:.
## The sp package is now running under evolution status 2
## (status 2 uses the sf package in place of rgdal)
## Registered S3 method overwritten by 'geojsonsf':
## method from
## print.geojson geojson
##
## Attaching package: 'geojsonio'
## The following object is masked from 'package:base':
##
## pretty
Wczytać bibliotekę
`
Następnie można zaimportować dane bezpośrednio do formatu simple feature:
powiaty <- geojson_sf("https://raw.githubusercontent.com/Tomasz-Olczyk/wizualizacjaR/main/mapy/gadm41_POL_2.json")
powiaty <- geojson_sf("https://raw.githubusercontent.com/Tomasz-Olczyk/wizualizacjaR/main/mapy/gadm41_POL_2.json")
powiaty <- geojson_sf("https://raw.githubusercontent.com/Tomasz-Olczyk/wizualizacjaR/main/mapy/gadm41_POL_2.json")
województwa <- geojson_sf("https://raw.githubusercontent.com/Tomasz-Olczyk/wizualizacjaR/main/mapy/gadm41_POL_1.json")
pkw_woj <- read_csv("https://raw.githubusercontent.com/Tomasz-Olczyk/wizualizacjaR/main/mapy/pkw_woj.csv")
## Rows: 16 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): wojewodztwo
## dbl (6): frekwencja, PiS, Konfederacja, KO, TD, Lewica
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rysujemy powiaty:
ggplot(powiaty) +
geom_sf()
województwa <- geojson_sf("https://raw.githubusercontent.com/Tomasz-Olczyk/wizualizacjaR/main/mapy/gadm41_POL_1.json")
województwa <- geojson_sf("https://raw.githubusercontent.com/Tomasz-Olczyk/wizualizacjaR/main/mapy/gadm41_POL_1.json")
Rysujemy województwa:
ggplot(województwa) +
geom_sf()
Dane PKW z poziomu województw via Github
pkw_woj <- read_csv("https://raw.githubusercontent.com/Tomasz-Olczyk/wizualizacjaR/main/mapy/pkw_woj.csv")
## Rows: 16 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): wojewodztwo
## dbl (6): frekwencja, PiS, Konfederacja, KO, TD, Lewica
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pkw_woj <- read_csv("https://raw.githubusercontent.com/Tomasz-Olczyk/wizualizacjaR/main/mapy/pkw_woj.csv")
## Rows: 16 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): wojewodztwo
## dbl (6): frekwencja, PiS, Konfederacja, KO, TD, Lewica
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(pkw_woj)
## Rows: 16
## Columns: 7
## $ wojewodztwo <chr> "dolnośląskie", "kujawsko-pomorskie", "lubelskie", "lubus…
## $ frekwencja <dbl> 73.97, 71.75, 70.51, 71.37, 76.12, 75.38, 79.27, 66.55, 6…
## $ PiS <dbl> 30.56, 32.27, 47.66, 27.76, 38.11, 41.72, 34.86, 31.26, 5…
## $ Konfederacja <dbl> 6.58, 6.39, 8.14, 6.51, 6.64, 8.02, 6.97, 6.49, 9.14, 9.7…
## $ KO <dbl> 36.00, 32.26, 19.11, 37.73, 29.69, 23.49, 31.50, 33.59, 1…
## $ TD <dbl> 12.48, 15.37, 14.70, 15.07, 13.42, 15.72, 14.83, 12.74, 1…
## $ Lewica <dbl> 10.09, 10.59, 5.68, 9.27, 8.82, 6.93, 8.48, 7.24, 4.71, 4…
województwa_s <- województwa %>%
select(NAME_1, geometry) %>%
mutate(wojewodztwo = tolower(NAME_1))
class(województwa)
## [1] "sf" "data.frame"
województwa_s <- województwa %>%
select(NAME_1, geometry) %>%
mutate(wojewodztwo = tolower(NAME_1))
wybory_1<- left_join(pkw_woj, województwa_s)
## Joining with `by = join_by(wojewodztwo)`
wybory_2 <- left_join(województwa_s, pkw_woj)
## Joining with `by = join_by(wojewodztwo)`
wybory_long <- wybory_2 %>%
pivot_longer(cols = c("frekwencja", "PiS", "Konfederacja", "KO", "TD", "Lewica"), names_to = "wyniki")
ggplot(wybory_long) +
geom_sf(aes(fill = value, geometry = geometry)) +
coord_sf() +
theme_void() +
scale_fill_gradient(low = "grey90", high = "red3") +
facet_wrap(~wyniki)
wybory_23_woj_s <- pkw_woj %>% mutate_at(c(2,3,4,5,6,7), scale)
wybory_long2 <- wybory_23_woj_s %>%
pivot_longer(cols = c("frekwencja", "PiS", "Konfederacja", "KO", "TD", "Lewica"), names_to = "partie")
wybory_ls <- full_join(województwa_s, wybory_long2,
by = "wojewodztwo")
ggplot(wybory_ls) +
geom_sf(aes(fill = value, geometry = geometry)) +
coord_sf() +
theme_void() +
scale_fill_gradient(low = "grey90", high = "red") +
facet_wrap(~partie)
Łączenie ramki danych PKW z ramką z konturami województw
wybory2 <- left_join(województwa_s, pkw_woj,
by = "wojewodztwo")
wybory_long <- wybory2 %>%
select(-frekwencja) %>%
pivot_longer(cols = c("PiS", "Konfederacja", "KO", "TD", "Lewica"), names_to = "wyniki")
ggplot(wybory_long) +
geom_sf(aes(fill = value)) +
coord_sf() +
theme_void() +
scale_fill_gradient(low = "grey90", high = "black") +
facet_wrap(~wyniki)
ggplot(województwa) +
geom_sf() +
coord_sf()
ggplot(województwa) +
geom_sf() +
coord_sf() +
geom_point(data = Polska, aes(x= LON, y = LAT, colour = gender, size = AverageViews*10, group = name), alpha = 0.5) +
ggrepel::geom_text_repel(data = Polska, aes(x= LON, y = LAT, label = name, group = name)) +
theme_void()
## Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
kor <- wybory_23_woj_s %>%
select(2:7)
korr <- round(cor(kor), 5)
korrr <- t(korr)
Biblioteka do korelacji
#install.packages("lares")
library(lares)
col <- colorRampPalette(c(“blue”, “white”, “red”))(20) heatmap(x = corr, col = col, symm = TRUE)
kor <- wybory_23_woj_s %>%
select(2:7)
korr <- round(cor(kor), 5)
korrr <- t(korr)
#par(oma=c(6,3,3,3))
#par(mar = c(2,2,2,2))
heatmap(x = as.matrix(korr), symm = FALSE)
heatmap(korr, symm = TRUE)
#library(ggdendro) #wymaga ggplot
klastry <- hclust(dist(korr))
plot(klastry)
plot(klastry, hang = -1, cex = 1) #argument hang na -1 lokuje wszystkie etykiety na jednym poziomie
#argument cex odpowiada z rozmiar czcionki
plot(klastry,
#type = "triangle", # kształ trójkątny podaje jako obsolete
ylab = "wysokość",
hang = -1,
cex = 1)
# Dane dotyczące aresztowań w USA
data(USArrests)
# zlicza dystanse
dd <- dist(scale(USArrests), method = "euclidean")
hc <- hclust(dd, method = "ward.D2")
#install.packages("ape")
library(ape)
##
## Attaching package: 'ape'
## The following object is masked from 'package:dplyr':
##
## where
plot(as.phylo(klastry), type = "cladogram", cex = 0.6,
label.offset = 0.5)
plot(as.phylo(hc), type = "unrooted", cex = 0.6,
no.margin = TRUE)
plot(as.phylo(hc), type = "fan")
#install.packages("ggdendro")
library(ggdendro) #wymaga ggplot
library(ggplot2)
ggdendrogram(klastry, rotate = TRUE)
?ggdendro::dendro_data
data <- dendro_data(hc, type = "triangle")
head(data$labels)
## x y label
## 1 1 0 Alabama
## 2 2 0 Louisiana
## 3 3 0 Georgia
## 4 4 0 Tennessee
## 5 5 0 North Carolina
## 6 6 0 Mississippi
ggplot(segment(data)) +
geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) +
scale_y_reverse(expand = c(0.2, 0)) +
geom_text(data = data$labels,
aes(x, y, label = label),
hjust = 0) +
theme_dendro() +
coord_flip()